VAST Challenge 3: Economics
packages = c('ggiraph', 'plotly', 'DT', 'patchwork', 'gganimate', 'tidyverse',
'readxl', 'gifski', 'gapminder', 'tidyverse', 'rmarkdown',
'ggdist', 'ggridges', 'patchwork', 'ggthemes', 'hrbrthemes','ggrepel',
'ggforce')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T) }
financial <- read_csv('data/FinancialJournal.csv')
paged_table(financial)
y <- as.POSIXct(financial$timestamp, format="%Y-%m-%d %H:%M:%S")
financial$year <- format(y, format="%Y")
financial$month <- format(y, format="%m")
income <- financial %>%
filter(category %in% c('Wage', 'RentAdjustment')) %>%
group_by(year, month) %>%
summarise(income = mean(amount))
outcome <- financial %>%
filter(!category %in% c('Wage', 'RentAdjustment')) %>%
group_by(year, month) %>%
summarise(outcome = mean(abs(amount)))
total <- merge(income, outcome, by=c('year', 'month'))
total$coef <- total$outcome / total$income
total$date <- paste(total$year, total$month, sep='-')
plot_ly(total, x = ~date, y = ~coef, type = 'scatter',mode = 'lines+markers') %>% layout(title="Trend of Living Standards",
xaxis = list(title = "Date"),
yaxis = list (title = "Coefficient\n(outcome/income)"))
total$remain <- (total$income - total$outcome)
total$remain <- round(total$remain, 1)
ggplot(data=total, aes(x=date, y=remain)) +
geom_bar(stat = "identity", width = 0.5, fill="steelblue") +
coord_cartesian(ylim = c(0, 160)) +
labs(y= 'Total Deposit', x= 'Date',
title = "Trend of Living Standards",
subtitle = "Highest remaining in 2022-03") +
geom_text(aes(label = remain), vjust = -1, colour = "black") +
theme(axis.title.y= element_text(angle=90),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
axis.ticks.x= element_blank(),
panel.background= element_blank(),
axis.line= element_line(color= 'grey'),
panel.grid.major.y = element_line(color = "grey",size = 0.5,linetype = 2))
wage <- financial %>%
filter(category == 'Wage') %>%
group_by(participantId) %>%
summarise(wage = mean(amount))
brks <- c(0, 100, 200, 300, 400, Inf)
grps <- c('<=100', '101-200', '201-300', '301-400', '>400')
wage$Wage_Group <- cut(wage$wage, breaks=brks, labels = grps, right = FALSE)
#plot_ly(wage, x = ~wage, type = "histogram")
p <- ggplot(data=wage, aes(x=wage, fill=Wage_Group)) +
geom_histogram(aes(y = ..density..)) +
geom_density(fill="red", alpha = 0.2)
ggplotly(p)
income <- financial %>%
filter(category %in% c('Wage', 'RentAdjustment')) %>%
group_by(participantId) %>%
summarise(income = sum(amount))
outcome <- financial %>%
filter(!category %in% c('Wage', 'RentAdjustment')) %>%
group_by(participantId) %>%
summarise(outcome = sum(abs(amount)))
comparison <- merge(income, outcome, by='participantId') %>%
merge(wage, by='participantId')
comparison$ratio <- comparison$outcome / comparison$income
ggplot(comparison, aes(x = ratio, y = Wage_Group)) +
geom_density_ridges(calc_ecdf = TRUE,
quantiles = 4,
quantile_lines = TRUE,
alpha = .2) +
theme_ridges() +
scale_fill_viridis_d(name = "Quartiles")+
ggtitle("Distribution of outcome/income Ratio in Different Wage Group")+
theme(plot.title = element_text(size = 12), legend.position = "top")